{
/*
 * Argon2 reference source code package - reference C implementations
 *
 * Copyright 2015
 * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
 *
 * Pascal tranlastion in 2018 by Alexander Koblov (alexx2000@mail.ru)
 *
 * You may use this work under the terms of a Creative Commons CC0 1.0
 * License/Waiver or the Apache Public License 2.0, at your option. The terms of
 * these licenses can be found at:
 *
 * - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
 * - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
 *
 * You should have received a copy of both of these licenses along with this
 * software. If not, they may be obtained at the above URLs.
 */
}

unit Argon2;

{$mode objfpc}{$H+}
{$define USE_MTPROCS}
{.$define GENKAT}

interface

uses
  CTypes, DCblake2;

const
  //* Number of synchronization points between lanes per pass */
  ARGON2_SYNC_POINTS = cuint32(4);

  //* Flags to determine which fields are securely wiped (default = no wipe). */
  ARGON2_DEFAULT_FLAGS = cuint32(0);
  ARGON2_FLAG_CLEAR_PASSWORD = (cuint32(1) shl 0);
  ARGON2_FLAG_CLEAR_SECRET = (cuint32(1) shl 1);

const
  //* Error codes */
  ARGON2_OK = 0;
  ARGON2_MEMORY_ALLOCATION_ERROR = -22;
  ARGON2_INCORRECT_PARAMETER = -25;


type
  Pargon2_context = ^Targon2_context;
  Targon2_context = record
    out_: pcuint8;    //* output array */
    outlen: cuint32;  //* digest length */

    pwd: pcuint8;    //* password array */
    pwdlen: cuint32; //* password length */

    salt: pcuint8;    //* salt array */
    saltlen: cuint32; //* salt length */

    secret: pcuint8;    //* key array */
    secretlen: cuint32; //* key length */

    ad: pcuint8;    //* associated data array */
    adlen: cuint32; //* associated data length */

    t_cost: cuint32;  //* number of passes */
    m_cost: cuint32;  //* amount of memory requested (KB) */
    lanes: cuint32;   //* number of lanes */
    threads: cuint32; //* maximum number of threads */

    version: cuint32; //* version number */

    flags: cuint32; //* array of bool options */
  end;

  //* Argon2 primitive type */
  Targon2_type = (
    Argon2_d = 0,
    Argon2_i = 1,
    Argon2_id = 2
  );

  //* Version of the algorithm */
  Targon2_version = (
    ARGON2_VERSION_10 = $10,
    ARGON2_VERSION_13 = $13,
    ARGON2_VERSION_NUMBER = ARGON2_VERSION_13
  );

function argon2id_kdf(const t_cost, m_cost, parallelism: cuint32;
                      const pwd: pansichar; const pwdlen: csize_t;
                      const salt: pansichar; const saltlen: csize_t;
                      hash: Pointer; const hashlen: csize_t): cint;

function argon2_hash(const t_cost, m_cost, parallelism: cuint32;
                     const pwd: pansichar; const pwdlen: csize_t;
                     const salt: pansichar; const saltlen: csize_t;
                     const secret: pansichar; const secretlen: csize_t;
                     const ad: pansichar; const adlen: csize_t;
                     hash: Pointer; const hashlen: csize_t;
                     type_: Targon2_type; version: Targon2_version): cint;

function argon2_selftest: Boolean;

implementation

{$R-}{$Q-}

uses
  Math, Hash, SysUtils, StrUtils
{$IF DEFINED(USE_MTPROCS)}
  , MTProcs
{$ENDIF}
  ;

//**********************Argon2 internal constants*******************************/

const
  //* Memory block size in bytes */
  ARGON2_BLOCK_SIZE = 1024;
  ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE div 8;

  (* Number of pseudo-random values generated by one call to Blake in Argon2i
     to
     generate reference block positions *)
  ARGON2_ADDRESSES_IN_BLOCK = 128;

  //* Pre-hashing digest length and its extension*/
  ARGON2_PREHASH_DIGEST_LENGTH = 64;
  ARGON2_PREHASH_SEED_LENGTH = 72;

//*************************Argon2 internal data types***********************/
type
  (*
   * Structure for the (1KB) memory block implemented as 128 64-bit words.
   * Memory blocks can be copied, XORed. Internal words can be accessed by [] (no
   * bounds checking).
   *)
  Pblock = ^Tblock;
  Tblock = packed record
    v: packed array [0..ARGON2_QWORDS_IN_BLOCK-1] of cuint64;
  end;

  (*
   * Argon2 instance: memory pointer, number of passes, amount of memory, type,
   * and derived values.
   * Used to evaluate the number and location of blocks to construct in each
   * thread
   *)
  Pargon2_instance_t = ^Targon2_instance_t;
  Targon2_instance_t = record
    memory: Pblock;          //* Memory pointer */
    version: Targon2_version;
    passes: cuint32;        //* Number of passes */
    memory_blocks: cuint32; //* Number of blocks in memory */
    segment_length: cuint32;
    lane_length: cuint32;
    lanes: cuint32;
    threads: cuint32;
    type_: Targon2_type;
    print_internals: cint; //* whether to print the memory blocks */
    context_ptr: Pargon2_context; //* points back to original context */
  end;

  (*
   * Argon2 position: where we construct the block right now. Used to distribute
   * work between threads.
   *)
  Pargon2_position_t = ^Targon2_position_t;
  Targon2_position_t = record
    pass: cuint32;
    slice: cuint8;
    index: cuint32;
    instance_ptr: Pargon2_instance_t;
  end;

{$IFDEF GENKAT}

procedure initial_kat(const blockhash: pcuint8; const context: Pargon2_context;
                      type_: Targon2_type);
var
  i: cuint32;
begin
  if (blockhash <> nil) and (context <> nil) then
  begin
    WriteLn('=======================================');

    WriteLn(Format('%d version number %d', [type_, context^.version]));

    WriteLn('=======================================');

    WriteLn(Format('Memory: %u KiB, Iterations: %u, Parallelism: %u lanes, Tag length: %u bytes',
             [context^.m_cost, context^.t_cost, context^.lanes, context^.outlen]));

    Write(Format('Password[%u]: ', [context^.pwdlen]));

    if (context^.flags and ARGON2_FLAG_CLEAR_PASSWORD <> 0) then
    begin
      WriteLn('CLEARED');
    end
    else begin
      for i := 0 to context^.pwdlen - 1 do
        Write(Format('%2.2x ', [context^.pwd[i]]));
      WriteLn;
    end;

    Write(Format('Salt[%u]: ', [context^.saltlen]));

    for i := 0 to context^.saltlen - 1 do begin
      Write(Format('%2.2x ', [context^.salt[i]]));
    end;
    WriteLn;

    (*
    printf("Secret[%u]: ", context->secretlen);

    if (context->flags & ARGON2_FLAG_CLEAR_SECRET) {
        printf("CLEARED\n");
    } else {
        for (i = 0; i < context->secretlen; ++i) {
            printf("%2.2x ", ((unsigned char )context->secret)[i]);
        }

        printf("\n");
    }

    printf("Associated data[%u]: ", context->adlen);

    for (i = 0; i < context->adlen; ++i) {
        printf("%2.2x ", ((unsigned char )context->ad)[i]);
    }

    printf("\n");

    printf("Pre-hashing digest: ");

    for (i = 0; i < ARGON2_PREHASH_DIGEST_LENGTH; ++i) {
        printf("%2.2x ", ((unsigned char )blockhash)[i]);
    }

    printf("\n"); *)
  end;
end;

procedure print_tag(const out_: pcuint8; outlen: cuint32);
var
  i: cuint32;
begin
  if (out_ <> nil) then
  begin
    Write('Tag: ');
    for i := 0 to outlen - 1 do begin
      Write(Format('%2.2x ', [out_[i]]));
    end;
    WriteLn;
  end;
end;

procedure internal_kat(const instance: Pargon2_instance_t; pass: cuint32);
var
 i, j: cuint32;
 how_many_words: cuint32;
begin
  if (instance <> nil) then
  begin
    WriteLn(Format('After pass %u:', [pass]));

    for i := 0 to instance^.memory_blocks - 1 do
    begin
      how_many_words :=
          IfThen(instance^.memory_blocks > ARGON2_QWORDS_IN_BLOCK,
              1,
              ARGON2_QWORDS_IN_BLOCK);

      for j := 0 to how_many_words - 1 do
        WriteLn(Format('Block %.4u [%3u]: %s', [i, j,
                HexStr(instance^.memory[i].v[j], 16)]));
    end;
  end;
end;

{$ENDIF}

function load32( const src: Pointer ): cuint32; inline;
begin
  Result := NtoLE(pcuint32(src)^);
end;

function load64( const src: pointer ): cuint64; inline;
begin
  Result := NtoLE(pcuint64(src)^);
end;

procedure store32( dst: pointer; w: cuint32 ); inline;
begin
  pcuint32(dst)^ := LEtoN(w);
end;

procedure store64( dst: pointer; w: cuint64 ); inline;
begin
  pcuint64(dst)^ := LEtoN(w);
end;

//* designed by the Lyra PHC team */
function fBlaMka(x, y: cuint64): cuint64; inline;
const
  m = cuint64($FFFFFFFF);
begin
  Result:= x + y + 2 * ((x and m) * (y and m));
end;

procedure G(var a, b, c, d: cuint64); inline;
begin
  a := fBlaMka(a, b);
  d := RorQWord(d xor a, 32);
  c := fBlaMka(c, d);
  b := RorQWord(b xor c, 24);
  a := fBlaMka(a, b);
  d := RorQWord(d xor a, 16);
  c := fBlaMka(c, d);
  b := RorQWord(b xor c, 63);
end;

procedure BLAKE2_ROUND_NOMSG(var v0, v1, v2, v3, v4, v5, v6, v7,
                             v8, v9, v10, v11, v12, v13, v14, v15: cuint64); inline;
begin
  G(v0, v4, v8, v12);
  G(v1, v5, v9, v13);
  G(v2, v6, v10, v14);
  G(v3, v7, v11, v15);
  G(v0, v5, v10, v15);
  G(v1, v6, v11, v12);
  G(v2, v7, v8, v13);
  G(v3, v4, v9, v14);
end;

//***************Instance and Position constructors**********/
procedure init_block_value(b: Pblock; in_: cuint8); inline;
begin
  FillChar(b^, SizeOf(Tblock), in_);
end;

procedure copy_block(dst: Pblock; const src: Pblock); inline;
begin
  Move(src^, dst^, SizeOf(Tblock));
end;

procedure xor_block(dst: Pblock; const src: Pblock);
var
  i: cint;
begin
  for i := 0 to ARGON2_QWORDS_IN_BLOCK - 1 do
    dst^.v[i] := dst^.v[i] xor src^.v[i];
end;

procedure load_block(dst: Pblock; const input: PByte); inline;
begin
  Move(input^, dst^, SizeOf(Tblock));
end;

procedure store_block(output: PByte; const src: Pblock); inline;
begin
  Move(src^, output^, SizeOf(Tblock));
end;

//***************Memory functions*****************/

procedure secure_wipe_memory(v: Pointer; n: csize_t);
{$OPTIMIZATION OFF}
begin
  FillChar(v^, n, 0);
end;
{$OPTIMIZATION DEFAULT}

procedure clear_internal_memory(v: Pointer; n: csize_t);
begin
  if (v <> nil) then secure_wipe_memory(v, n);
end;

function allocate_memory(memory: PPByte; num, size: csize_t): cint;
var
  memory_size: csize_t;
begin
  memory_size := num * size;
  if (memory = nil) then begin
      Exit(ARGON2_MEMORY_ALLOCATION_ERROR);
  end;

  //* Check for multiplication overflow */
  if (size <> 0) and (memory_size div size <> num) then begin
      Exit(ARGON2_MEMORY_ALLOCATION_ERROR);
  end;

   memory^ := GetMem(memory_size);

  if (memory^ = nil) then begin
      Exit(ARGON2_MEMORY_ALLOCATION_ERROR);
  end;

  Result:= ARGON2_OK;
end;

procedure free_memory(memory: pcuint8; num, size: csize_t);
var
  memory_size: csize_t;
begin
  memory_size := num * size;
  clear_internal_memory(memory, memory_size);
  FreeMem(memory);
end;

(*
 * Function fills a new memory block and optionally XORs the old block over the new one.
 * @next_block must be initialized.
 * @param prev_block Pointer to the previous block
 * @param ref_block Pointer to the reference block
 * @param next_block Pointer to the block to be constructed
 * @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
 * @pre all block pointers must be valid
 *)
procedure fill_block(const prev_block: Pblock; const ref_block: Pblock;
                       next_block: Pblock; with_xor: boolean);
var
  i: cuint32;
  blockR, block_tmp: Tblock;
begin
  copy_block(@blockR, ref_block);
  xor_block(@blockR, prev_block);
  copy_block(@block_tmp, @blockR);
  //* Now blockR = ref_block + prev_block and block_tmp = ref_block + prev_block */
  if (with_xor) then
  begin
      //* Saving the next block contents for XOR over: */
      xor_block(@block_tmp, next_block);
      (* Now blockR = ref_block + prev_block and
         block_tmp = ref_block + prev_block + next_block *)
  end;

  (* Apply Blake2 on columns of 64-bit words: (0,1,...,15) , then
     (16,17,..31)... finally (112,113,...127) *)
  for i := 0 to 7 do
  begin
      BLAKE2_ROUND_NOMSG(
          blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
          blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
          blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
          blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
          blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
          blockR.v[16 * i + 15]);
  end;

  (* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
     (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) *)
  for i := 0 to 7 do
  begin
      BLAKE2_ROUND_NOMSG(
          blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
          blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
          blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
          blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
          blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
          blockR.v[2 * i + 113]);
  end;

  copy_block(next_block, @block_tmp);
  xor_block(next_block, @blockR);
end;

function blake2b(out_: pcuint8; outlen: csize_t; const in_: pcuint8; inlen: csize_t): cint;
var
  S: blake2b_state;
begin
  if (blake2b_init(@S, outlen) = 0) then
  begin
    blake2b_update(@S, in_, inlen);
    blake2b_final(@S, out_, outlen);
    Exit(0);
  end;
  Result:= -1;
end;

procedure blake2b_long(pout: pointer; outlen: csize_t; const in_: pointer; inlen: csize_t);
var
  out_: pcuint8;
  toproduce: cuint32;
  blake_state: blake2b_state;
  outlen_bytes: array [0..sizeof(cuint32)-1] of cuint8;
  in_buffer: array[0..Pred(BLAKE2B_OUTBYTES)] of cuint8;
  out_buffer: array[0..Pred(BLAKE2B_OUTBYTES)] of cuint8;
begin
  out_:= pout;
  //* Ensure little-endian byte order! */
  store32(@outlen_bytes[0], cuint32(outlen));

  if (outlen <= BLAKE2B_OUTBYTES) then
  begin
    blake2b_init(@blake_state, outlen);
    blake2b_update(@blake_state, outlen_bytes, sizeof(outlen_bytes));
    blake2b_update(@blake_state, in_, inlen);
    blake2b_final(@blake_state, out_, outlen);
  end
  else begin
    blake2b_init(@blake_state, BLAKE2B_OUTBYTES);
    blake2b_update(@blake_state, outlen_bytes, sizeof(outlen_bytes));
    blake2b_update(@blake_state, in_, inlen);
    blake2b_final(@blake_state, out_buffer, BLAKE2B_OUTBYTES);
    Move(out_buffer[0], out_^, BLAKE2B_OUTBYTES div 2);
    out_ += BLAKE2B_OUTBYTES div 2;
    toproduce := cuint32(outlen) - BLAKE2B_OUTBYTES div 2;

    while (toproduce > BLAKE2B_OUTBYTES) do
    begin
      Move(out_buffer[0], in_buffer[0], BLAKE2B_OUTBYTES);
      blake2b(out_buffer, BLAKE2B_OUTBYTES, in_buffer, BLAKE2B_OUTBYTES);
      Move(out_buffer[0], out_^, BLAKE2B_OUTBYTES div 2);
      out_ += BLAKE2B_OUTBYTES div 2;
      toproduce -= BLAKE2B_OUTBYTES div 2;
    end;

    Move(out_buffer[0], in_buffer[0], BLAKE2B_OUTBYTES);
    blake2b(out_buffer, toproduce, in_buffer, BLAKE2B_OUTBYTES);
    Move(out_buffer[0], out_^, toproduce);
  end;
  clear_internal_memory(@blake_state, sizeof(blake_state));
end;

procedure next_addresses(address_block, input_block: Pblock;
                           const zero_block: Pblock);
begin
  Inc(input_block^.v[6]);
  fill_block(zero_block, input_block, address_block, false);
  fill_block(zero_block, address_block, address_block, false);
end;

function index_alpha(const instance: Pargon2_instance_t;
                     const position: Pargon2_position_t; pseudo_rand: cuint32;
                     same_lane: boolean): cuint32;
var
  reference_area_size: cuint32;
  relative_position: cuint64;
  start_position, absolute_position: cuint32;
begin
  (*
   * Pass 0:
   *      This lane : all already finished segments plus already constructed
   * blocks in this segment
   *      Other lanes : all already finished segments
   * Pass 1+:
   *      This lane : (SYNC_POINTS - 1) last segments plus already constructed
   * blocks in this segment
   *      Other lanes : (SYNC_POINTS - 1) last segments
   *)

  if (0 = position^.pass) then
  begin
      //* First pass */
      if (0 = position^.slice) then
      begin
          //* First slice */
          reference_area_size :=
              position^.index - 1; //* all but the previous */
      end
      else begin
          if (same_lane) then
          begin
              //* The same lane => add current segment */
              reference_area_size :=
                  position^.slice * instance^.segment_length +
                  position^.index - 1;
          end
          else begin
              reference_area_size :=
                  position^.slice * instance^.segment_length +
                  IfThen((position^.index = 0), (-1), 0);
          end;
      end
  end
  else begin
      //* Second pass */
      if (same_lane) then
      begin
          reference_area_size := instance^.lane_length -
                                 instance^.segment_length + position^.index - 1;
      end
      else begin
          reference_area_size := instance^.lane_length -
                                 instance^.segment_length +
                                 IfThen((position^.index = 0), (-1), 0);
      end;
  end;

  (* 1.2.4. Mapping pseudo_rand to 0..<reference_area_size-1> and produce
   * relative position *)
  relative_position := pseudo_rand;
  relative_position := relative_position * relative_position shr 32;
  relative_position := reference_area_size - 1 -
                       (reference_area_size * relative_position shr 32);

  //* 1.2.5 Computing starting position */
  start_position := 0;

  if (0 <> position^.pass) then
  begin
      start_position := IfThen(position^.slice = ARGON2_SYNC_POINTS - 1,
                               0,
                               (position^.slice + 1) * instance^.segment_length);
  end;

  //* 1.2.6. Computing absolute position */
  absolute_position := (start_position + relative_position) mod
                      instance^.lane_length; //* absolute position */
  Result:= absolute_position;
end;

procedure fill_segment(position_lane: PtrInt; Data: Pointer; {%H-}Item: TObject);
var
  ref_block: Pblock = nil;
  curr_block: Pblock = nil;
  address_block, input_block, zero_block: Tblock;
  pseudo_rand, ref_index, ref_lane: cuint64;
  prev_offset, curr_offset: cuint32;
  starting_index: cuint32;
  i: cuint32;
  data_independent_addressing: boolean;
  position: Targon2_position_t;
  instance: Pargon2_instance_t absolute position.instance_ptr;
begin
  if (Data = nil) then Exit;

  position := Pargon2_position_t(Data)^;

  data_independent_addressing :=
      (instance^.type_ = Argon2_i) or
      ((instance^.type_ = Argon2_id) and (position.pass = 0) and
       (position.slice < ARGON2_SYNC_POINTS div 2));

  if (data_independent_addressing) then
  begin
    init_block_value(@zero_block, 0);
    init_block_value(@input_block, 0);

    input_block.v[0] := position.pass;
    input_block.v[1] := position_lane;
    input_block.v[2] := position.slice;
    input_block.v[3] := instance^.memory_blocks;
    input_block.v[4] := instance^.passes;
    input_block.v[5] := cuint64(instance^.type_);
  end;

  position.index := 0;
  starting_index := 0;

  if ((0 = position.pass) and (0 = position.slice)) then
  begin
    starting_index := 2; //* we have already generated the first two blocks */

    //* Don't forget to generate the first block of addresses: */
    if (data_independent_addressing) then begin
        next_addresses(@address_block, @input_block, @zero_block);
    end;
  end;

  //* Offset of the current block */
  curr_offset := position_lane * instance^.lane_length +
                position.slice * instance^.segment_length + starting_index;

  if (0 = curr_offset mod instance^.lane_length) then
  begin
      //* Last block in this lane */
      prev_offset := curr_offset + instance^.lane_length - 1;
  end
  else begin
      //* Previous block */
      prev_offset := curr_offset - 1;
  end;

  for i := starting_index to instance^.segment_length - 1 do
  begin
    //*1.1 Rotating prev_offset if needed */
    if (curr_offset mod instance^.lane_length = 1) then begin
        prev_offset := curr_offset - 1;
    end;

    //* 1.2 Computing the index of the reference block */
    //* 1.2.1 Taking pseudo-random value from the previous block */
    if (data_independent_addressing) then

    begin
        if (i mod ARGON2_ADDRESSES_IN_BLOCK = 0) then begin
            next_addresses(@address_block, @input_block, @zero_block);
        end;
        pseudo_rand := address_block.v[i mod ARGON2_ADDRESSES_IN_BLOCK];
    end
    else begin
        pseudo_rand := instance^.memory[prev_offset].v[0];
    end;

    //* 1.2.2 Computing the lane of the reference block */
    ref_lane := ((pseudo_rand shr 32)) mod instance^.lanes;

    if ((position.pass = 0) and (position.slice = 0)) then begin
        //* Can not reference other lanes yet */
        ref_lane := position_lane;
    end;

    //* 1.2.3 Computing the number of possible reference block within the lane. */
    position.index := i;
    ref_index := index_alpha(instance, @position, pseudo_rand and $FFFFFFFF,
                            ref_lane = position_lane);

    //* 2 Creating a new block */
    ref_block :=
        instance^.memory + instance^.lane_length * ref_lane + ref_index;
    curr_block := instance^.memory + curr_offset;
    if (ARGON2_VERSION_10 = instance^.version) then begin
        //* version 1.2.1 and earlier: overwrite, not XOR */
        fill_block(instance^.memory + prev_offset, ref_block, curr_block, false);
    end
    else begin
        if (0 = position.pass) then begin
            fill_block(instance^.memory + prev_offset, ref_block,
                       curr_block, false);
        end
        else begin
            fill_block(instance^.memory + prev_offset, ref_block,
                       curr_block, true);
        end;
    end;

    Inc(curr_offset);
    Inc(prev_offset);
  end;
end;

procedure finalize(const context: Pargon2_context; instance: Pargon2_instance_t);
var
  l: cuint32;
  blockhash: Tblock;
  last_block_in_lane: cuint32;
  blockhash_bytes: array [0..ARGON2_BLOCK_SIZE-1] of cuint8;
begin
  if (context <> nil) and (instance <> nil) then
  begin
    copy_block(@blockhash, instance^.memory + instance^.lane_length - 1);

    //* XOR the last blocks */
    for l := 1 to  instance^.lanes - 1 do
    begin
        last_block_in_lane := l * instance^.lane_length + (instance^.lane_length - 1);
        xor_block(@blockhash, instance^.memory + last_block_in_lane);
    end;

    //* Hash the result */
    begin
      store_block(@blockhash_bytes[0], @blockhash);
      blake2b_long(context^.out_, context^.outlen, @blockhash_bytes[0],
                   ARGON2_BLOCK_SIZE);
      //* clear blockhash and blockhash_bytes */
      clear_internal_memory(@blockhash.v[0], ARGON2_BLOCK_SIZE);
      clear_internal_memory(@blockhash_bytes[0], ARGON2_BLOCK_SIZE);
    end;

{$IFDEF GENKAT}
      print_tag(context^.out_, context^.outlen);
{$ENDIF}

      free_memory(pcuint8(instance^.memory),
                  instance^.memory_blocks, sizeof(Tblock));
  end;
end;

function fill_memory_blocks(instance: Pargon2_instance_t): cint;
var
  r, s, l: cuint32;
  position: Targon2_position_t;
begin
  if (instance = nil) or (instance^.lanes = 0) then begin
    Exit(ARGON2_INCORRECT_PARAMETER);
  end;
  position.instance_ptr:= instance;
  for r := 0 to instance^.passes - 1 do
  begin
    position.pass:= r;
    for s := 0 to ARGON2_SYNC_POINTS - 1 do
    begin
      position.slice:= s;
{$IF DEFINED(USE_MTPROCS)}
      if instance^.lanes > 1 then
        ProcThreadPool.DoParallel(TMTProcedure(@fill_segment), 0, instance^.lanes - 1, @position)
      else
{$ENDIF}
        for l := 0 to instance^.lanes - 1 do fill_segment(l, @position, nil);
    end;
{$IFDEF GENKAT}
    internal_kat(instance, r); ///* Print all memory blocks */
{$ENDIF}
  end;
  Result:= ARGON2_OK;
end;

procedure fill_first_blocks(blockhash: pcuint8; const instance: pargon2_instance_t);
var
  l: cuint32;
  blockhash_bytes: array[0..ARGON2_BLOCK_SIZE-1] of cuint8;
begin
  //* Make the first and second block in each lane as G(H0||0||i) or G(H0||1||i) */
  for l := 0 to instance^.lanes - 1 do
  begin
    store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 0);
    store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4, l);
    blake2b_long(@blockhash_bytes[0], ARGON2_BLOCK_SIZE, blockhash,
                 ARGON2_PREHASH_SEED_LENGTH);
    load_block(@instance^.memory[l * instance^.lane_length + 0],
               blockhash_bytes);

    store32(blockhash + ARGON2_PREHASH_DIGEST_LENGTH, 1);
    blake2b_long(@blockhash_bytes[0], ARGON2_BLOCK_SIZE, blockhash,
                 ARGON2_PREHASH_SEED_LENGTH);
    load_block(@instance^.memory[l * instance^.lane_length + 1],
               blockhash_bytes);
  end;
  clear_internal_memory(@blockhash_bytes[0], ARGON2_BLOCK_SIZE);
end;

procedure initial_hash(blockhash: pcuint8; context: Pargon2_context;
                       type_: Targon2_type);
var
  BlakeHash: blake2b_state;
  value: array[0..sizeof(cuint32)-1] of cuint8;
begin
  if (nil = context) or (nil = blockhash) then Exit;

  blake2b_init(@BlakeHash, ARGON2_PREHASH_DIGEST_LENGTH);

  store32(@value[0], context^.lanes);
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  store32(@value[0], context^.outlen);
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  store32(@value[0], context^.m_cost);
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  store32(@value[0], context^.t_cost);
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  store32(@value[0], context^.version);
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  store32(@value[0], cuint32(type_));
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  store32(@value[0], context^.pwdlen);
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  if (context^.pwd <> nil) then
  begin
    blake2b_update(@BlakeHash, context^.pwd,
                   context^.pwdlen);

    if (context^.flags and ARGON2_FLAG_CLEAR_PASSWORD <> 0) then
    begin
        secure_wipe_memory(context^.pwd, context^.pwdlen);
        context^.pwdlen := 0;
    end;
  end;

  store32(@value[0], context^.saltlen);
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  if (context^.salt <> nil) then
  begin
    blake2b_update(@BlakeHash, context^.salt,
                   context^.saltlen);
  end;

  store32(@value[0], context^.secretlen);
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  if (context^.secret <> nil) then
  begin
    blake2b_update(@BlakeHash, context^.secret,
                   context^.secretlen);

    if (context^.flags and ARGON2_FLAG_CLEAR_SECRET <> 0) then
    begin
      secure_wipe_memory(context^.secret, context^.secretlen);
      context^.secretlen := 0;
    end;
  end;

  store32(@value[0], context^.adlen);
  blake2b_update(@BlakeHash, @value[0], sizeof(value));

  if (context^.ad <> nil) then
  begin
    blake2b_update(@BlakeHash, context^.ad,
                   context^.adlen);
  end;

  blake2b_final(@BlakeHash, blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
end;

function initialize(instance: Pargon2_instance_t; context: Pargon2_context): cint;
var
  blockhash: array[0..ARGON2_PREHASH_SEED_LENGTH-1] of cuint8;
begin
  instance^.context_ptr := context;

  //* 1. Memory allocation */
  result := allocate_memory(@(instance^.memory),
                            instance^.memory_blocks, sizeof(Tblock));
  if (result <> ARGON2_OK) then Exit;

  (* 2. Initial hashing */
  /* H_0 + 8 extra bytes to produce the first blocks */
  /* uint8_t blockhash[ARGON2_PREHASH_SEED_LENGTH]; */
  /* Hashing all inputs *)
  initial_hash(blockhash, context, instance^.type_);
  //* Zeroing 8 extra bytes */
  clear_internal_memory(@blockhash[ARGON2_PREHASH_DIGEST_LENGTH],
                        ARGON2_PREHASH_SEED_LENGTH -
                            ARGON2_PREHASH_DIGEST_LENGTH);

{$IFDEF GENKAT}
  initial_kat(blockhash, context, instance^.type_);
{$ENDIF}

  //* 3. Creating first blocks, we always have at least two blocks in a slice */
  fill_first_blocks(blockhash, instance);
  //* Clearing the hash */
  clear_internal_memory(@blockhash[0], ARGON2_PREHASH_SEED_LENGTH);

  Result:= ARGON2_OK;
end;

function argon2_ctx(context: Pargon2_context; type_: Targon2_type): cint;
var
  memory_blocks, segment_length: cuint32;
  instance: Targon2_instance_t;
begin
  (*
  //* 1. Validate all inputs */
  int result = validate_inputs(context);

  if (ARGON2_OK != result) {
      return result;
  }

  if (Argon2_d != type && Argon2_i != type && Argon2_id != type) {
      return ARGON2_INCORRECT_TYPE;
  }
  *)

  //* 2. Align memory size */
  //* Minimum memory_blocks = 8L blocks, where L is the number of lanes */
  memory_blocks := context^.m_cost;

  if (memory_blocks < 2 * ARGON2_SYNC_POINTS * context^.lanes) then begin
      memory_blocks := 2 * ARGON2_SYNC_POINTS * context^.lanes;
  end;

  segment_length := memory_blocks div (context^.lanes * ARGON2_SYNC_POINTS);
  //* Ensure that all segments have equal length */
  memory_blocks := segment_length * (context^.lanes * ARGON2_SYNC_POINTS);

  instance.version := Targon2_version(context^.version);
  instance.memory := nil;
  instance.passes := context^.t_cost;
  instance.memory_blocks := memory_blocks;
  instance.segment_length := segment_length;
  instance.lane_length := segment_length * ARGON2_SYNC_POINTS;
  instance.lanes := context^.lanes;
  instance.threads := context^.threads;
  instance.type_ := type_;

  if (instance.threads > instance.lanes) then begin
      instance.threads := instance.lanes;
  end;

  //* 3. Initialization: Hashing inputs, allocating memory, filling first blocks */
  result := initialize(@instance, context);

  if (ARGON2_OK <> result) then Exit;

  //* 4. Filling memory */
  result := fill_memory_blocks(@instance);

  if (ARGON2_OK <> result) then Exit;

  //* 5. Finalization */
  finalize(context, @instance);

  Result:= ARGON2_OK;
end;

function argon2_hash(const t_cost, m_cost, parallelism: cuint32;
                     const pwd: pansichar; const pwdlen: csize_t;
                     const salt: pansichar; const saltlen: csize_t;
                     const secret: pansichar; const secretlen: csize_t;
                     const ad: pansichar; const adlen: csize_t;
                     hash: Pointer; const hashlen: csize_t;
                     type_: Targon2_type; version: Targon2_version): cint;
var
  context: Targon2_context;
begin
  (*
  if (pwdlen > ARGON2_MAX_PWD_LENGTH) {
      return ARGON2_PWD_TOO_LONG;
  }

  if (saltlen > ARGON2_MAX_SALT_LENGTH) {
      return ARGON2_SALT_TOO_LONG;
  }

  if (hashlen > ARGON2_MAX_OUTLEN) {
      return ARGON2_OUTPUT_TOO_LONG;
  }

  if (hashlen < ARGON2_MIN_OUTLEN) {
      return ARGON2_OUTPUT_TOO_SHORT;
  }
  *)
  context.out_ := GetMem(hashlen);

  if (context.out_ = nil) then begin
    Exit(ARGON2_MEMORY_ALLOCATION_ERROR);
  end;

  context.outlen := cuint32(hashlen);
  context.pwd := pcuint8(pwd);
  context.pwdlen := cuint32(pwdlen);
  context.salt := pcuint8(salt);
  context.saltlen := cuint32(saltlen);
  context.secret := pcuint8(secret);
  context.secretlen := secretlen;
  context.ad := pcuint8(ad);
  context.adlen := adlen;
  context.t_cost := t_cost;
  context.m_cost := m_cost;
  context.lanes := parallelism;
  context.threads := parallelism;
  context.flags := ARGON2_DEFAULT_FLAGS;
  context.version := cuint32(version);

  result := argon2_ctx(@context, type_);

  //* if raw hash requested, write it */
  if (result = ARGON2_OK) and (hash <> nil) then
  begin
    Move(context.out_^, hash^, hashlen);
  end;

  clear_internal_memory(context.out_, hashlen);
  FreeMem(context.out_);
end;

function argon2id_kdf(const t_cost, m_cost, parallelism: cuint32;
                      const pwd: pansichar; const pwdlen: csize_t;
                      const salt: pansichar; const saltlen: csize_t;
                      hash: Pointer; const hashlen: csize_t): cint;
begin
  Result:= argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, nil, 0,
                       nil, 0, hash, hashlen, Argon2_id, ARGON2_VERSION_NUMBER);
end;

function argon2_selftest: Boolean;

  function hash_test(version: Targon2_version; type_: Targon2_type; t, m, p: cuint32; pwd, salt, hex: String): Boolean;
  var
    Q: QWord;
    out_: String;
    out_hex: String;
    out_len: Integer;
  begin
      out_len:= Length(hex) div 2;
      WriteLn(Format('Hash test: $v=%d t=%d, m=%d, p=%d, pass=%s, salt=%s, result=%d',
              [version, t, m, p, pwd, salt, out_len]));

      SetLength(out_, out_len);
      Q:= GetTickCount64;
      argon2_hash(t, 1 shl m, p, Pointer(pwd), Length(pwd), Pointer(salt), Length(salt),
                  nil, 0, nil, 0, Pointer(out_), OUT_LEN, type_, version);
      WriteLn('Time:   ', GetTickCount64 - Q);
      SetLength(out_hex, OUT_LEN * 2);
      BinToHex(PAnsiChar(out_), PAnsiChar(out_hex), OUT_LEN);
      Result:= SameText(hex, out_hex);
      WriteLn('Must:   ', hex);
      WriteLn('Have:   ', out_hex);
      WriteLn('Result: ', Result);
      WriteLn('------------------------------------------------------------');
  end;

begin
  Result:= True;
  // Test Argon2i
  Result:= Result and hash_test(ARGON2_VERSION_10, Argon2_i, 2, 16, 1, 'password', 'somesalt',
                                'f6c4db4a54e2a370627aff3db6176b94a2a209a62c8e36152711802f7b30c694');
  Result:= Result and hash_test(ARGON2_VERSION_NUMBER, Argon2_i, 2, 16, 1, 'password', 'somesalt',
                                'c1628832147d9720c5bd1cfd61367078729f6dfb6f8fea9ff98158e0d7816ed0');
  Result:= Result and hash_test(ARGON2_VERSION_NUMBER, Argon2_i, 2, 16, 1, 'differentpassword', 'somesalt',
                                '14ae8da01afea8700c2358dcef7c5358d9021282bd88663a4562f59fb74d22ee');
  Result:= Result and hash_test(ARGON2_VERSION_NUMBER, Argon2_i, 2, 16, 1, 'password', 'diffsalt',
                                'b0357cccfbef91f3860b0dba447b2348cbefecadaf990abfe9cc40726c521271');
  // Test Argon2d
  Result:= Result and hash_test(ARGON2_VERSION_NUMBER, Argon2_d, 2, 16, 1, 'password', 'somesalt',
                                '955e5d5b163a1b60bba35fc36d0496474fba4f6b59ad53628666f07fb2f93eaf');
  // Test Argon2id
  Result:= Result and hash_test(ARGON2_VERSION_NUMBER, Argon2_id, 2, 16, 1, 'password', 'somesalt',
                                '09316115d5cf24ed5a15a31a3ba326e5cf32edc24702987c02b6566f61913cf7');
  Result:= Result and hash_test(ARGON2_VERSION_NUMBER, Argon2_id, 2, 16, 2, 'password', 'somesalt',
                                '6f681ac1c3384a90119d2763a683f9ac79532d999abfab5644aa8aafd3d0d234');
  // Recommended parameters (the running time about 125ms on Intel Core i5-7400 64 bit)
  Result:= Result and hash_test(ARGON2_VERSION_NUMBER, Argon2_id, 2, 16, 4,
                                'password','123456789012345678901234567890xy',
                                'c80142cbb6076b2d6be20137ddf24679cfc70eb4cde0f242a342e9e63636292eb2efcd907873fc19ca0bee0b7d7e992a7f68ce24a2da379bc41d5eb235f76eaa17220a6fa82d2d4a2e168b021dbfa5ba5a9f232ea0a1e24d');
  WriteLn('Result: ', Result);
end;

end.
